Bayesian GLMM Part1

Author

Murray Logan

Published

07/07/2025

1 Preparations

Load the necessary libraries

library(tidyverse) # for data wrangling etc
library(rstanarm) # for fitting models in STAN
library(cmdstanr) # for cmdstan
library(brms) # for fitting models in STAN
library(standist) # for exploring distributions
library(HDInterval) # for HPD intervals
library(posterior) # for posterior draws
library(coda) # for diagnostics
library(bayesplot) # for diagnostics
library(ggmcmc) # for diagnostics
library(rstan) # for interfacing with STAN
library(DHARMa) # for residual diagnostics
library(emmeans) # for marginal means etc
library(broom) # for tidying outputs
library(broom.mixed) # for tidying MCMC outputs
library(tidybayes) # for more tidying outputs
library(ggeffects) # for partial plots
library(patchwork) # for multiple figures
library(bayestestR) # for ROPE
library(see) # for some plots
library(ggridges) # for ridge plots
library(easystats) # framework for stats, modelling and visualisation
library(modelsummary)
source("helperFunctions.R")

2 Scenario

A plant pathologist wanted to examine the effects of two different strengths of tobacco virus on the number of lesions on tobacco leaves. She knew from pilot studies that leaves were inherently very variable in response to the virus. In an attempt to account for this leaf to leaf variability, both treatments were applied to each leaf. Eight individual leaves were divided in half, with half of each leaf inoculated with weak strength virus and the other half inoculated with strong virus. So the leaves were blocks and each treatment was represented once in each block. A completely randomised design would have had 16 leaves, with 8 whole leaves randomly allocated to each treatment.

Figure 1: Tobacco plant
Table 1: Format of tobacco.csv data files
LEAF TREAT NUMBER
1 Strong 35.898
1 Week 25.02
2 Strong 34.118
2 Week 23.167
3 Strong 35.702
3 Week 24.122
... ... ...
Table 2: Description of the variables in the tobacco data file
LEAF The blocking factor - Factor B
TREAT Categorical representation of the strength of the tobacco virus - main factor of interest Factor A
NUMBER Number of lesions on that part of the tobacco leaf - response variable

3 Read in the data

tobacco <- read_csv("../data/tobacco.csv", trim_ws = TRUE)
Rows: 16 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): LEAF, TREATMENT
dbl (1): NUMBER

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(tobacco)
Rows: 16
Columns: 3
$ LEAF      <chr> "L1", "L1", "L2", "L2", "L3", "L3", "L4", "L4", "L5", "L5", …
$ TREATMENT <chr> "Strong", "Weak", "Strong", "Weak", "Strong", "Weak", "Stron…
$ NUMBER    <dbl> 35.89776, 25.01984, 34.11786, 23.16740, 35.70215, 24.12191, …
## Explore the first 6 rows of the data
head(tobacco)
# A tibble: 6 × 3
  LEAF  TREATMENT NUMBER
  <chr> <chr>      <dbl>
1 L1    Strong      35.9
2 L1    Weak        25.0
3 L2    Strong      34.1
4 L2    Weak        23.2
5 L3    Strong      35.7
6 L3    Weak        24.1
str(tobacco)
spc_tbl_ [16 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ LEAF     : chr [1:16] "L1" "L1" "L2" "L2" ...
 $ TREATMENT: chr [1:16] "Strong" "Weak" "Strong" "Weak" ...
 $ NUMBER   : num [1:16] 35.9 25 34.1 23.2 35.7 ...
 - attr(*, "spec")=
  .. cols(
  ..   LEAF = col_character(),
  ..   TREATMENT = col_character(),
  ..   NUMBER = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
tobacco |> datawizard::data_codebook()
tobacco (16 rows and 3 variables, 3 shown)

ID | Name      | Type      | Missings |         Values |         N
---+-----------+-----------+----------+----------------+----------
1  | LEAF      | character | 0 (0.0%) |             L1 | 2 (12.5%)
   |           |           |          |             L2 | 2 (12.5%)
   |           |           |          |             L3 | 2 (12.5%)
   |           |           |          |             L4 | 2 (12.5%)
   |           |           |          |             L5 | 2 (12.5%)
   |           |           |          |             L6 | 2 (12.5%)
   |           |           |          |             L7 | 2 (12.5%)
   |           |           |          |             L8 | 2 (12.5%)
---+-----------+-----------+----------+----------------+----------
2  | TREATMENT | character | 0 (0.0%) |         Strong | 8 (50.0%)
   |           |           |          |           Weak | 8 (50.0%)
---+-----------+-----------+----------+----------------+----------
3  | NUMBER    | numeric   | 0 (0.0%) | [20.57, 44.72] |        16
------------------------------------------------------------------
tobacco |> modelsummary::datasummary_skim()
Unique Missing Pct. Mean SD Min Median Max Histogram
NUMBER 16 0 31.0 6.5 20.6 33.1 44.7
N %
LEAF L1 2 12.5
L2 2 12.5
L3 2 12.5
L4 2 12.5
L5 2 12.5
L6 2 12.5
L7 2 12.5
L8 2 12.5
TREATMENT Strong 8 50.0
Weak 8 50.0
tobacco |> modelsummary::datasummary_skim(by = "TREATMENT")
TREATMENT Unique Missing Pct. Mean SD Min Median Max Histogram
NUMBER Strong 8 0 34.9 5.1 26.2 34.9 44.7
Weak 8 0 27.1 5.5 20.6 25.1 36.0
N %
LEAF L1 2 12.5
L2 2 12.5
L3 2 12.5
L4 2 12.5
L5 2 12.5
L6 2 12.5
L7 2 12.5
L8 2 12.5
TREATMENT Strong 8 50.0
Weak 8 50.0

4 Exploratory data analysis

To explore the assumptions of homogeneity of variance and normality, a boxplot of each Treatment level is appropriate.

ggplot(tobacco, aes(y = NUMBER, x = TREATMENT)) +
  geom_boxplot()

Conclusions:

  • both normality and homogeneity of variance seem satisfied

It can also be useful to get a sense of the consistency across blocks (LEAF). That is, do all Leaves have a similar baseline level of lesion susceptibility and do they respond similarly to the treatment.

ggplot(tobacco, aes(y = NUMBER, x = as.numeric(LEAF))) +
  geom_line(aes(linetype = TREATMENT))

## If we want to retain the original LEAF labels
ggplot(tobacco, aes(y = NUMBER, x = as.numeric(LEAF))) +
  geom_blank(aes(x = LEAF)) +
  geom_line(aes(linetype = TREATMENT))

Conclusions:

  • it is clear that some leaves are more susceptible to lesions (e.g. Leaf 7) than other leaves (e.g. Leaf 4)
  • most leaves (other than Leaf 4 and 6) have a similar response to the Treatments - that is most have higher number of lesions from the Strong Treatment than the Weak Treatment.

Given that there are only two levels of Treatment (Strong and Weak), it might be easier to visualise the differences in baselines and effect consistency by plotting as:

ggplot(tobacco, aes(y = NUMBER, x = TREATMENT, group = LEAF)) +
  geom_point() +
  geom_line(aes(x = as.numeric(TREATMENT)))

Conclusions:

  • this figure reiterates the points made earlier about the varying baselines and effect consistency.

The above figure also serves as a good way to visualise certain aspects of mixed effects models. When we fit a mixed effects model that includes a random blocking effect (in this case LEAF), we are indicating that we are allowing there to be a different intercept for each block (LEAF). In the current case, the intercept will represent the first Treatment level (Strong). So the random effect is specifying that the intercept can vary from Leaf to Leaf.

We can think of the model as having two tiers (a hierarchy), where the tiers of the hierarchy represent progressively smaller (typically) spatial scales. In the current example, the largest spatial units are the leaves (blocking factor). Within the leaves, there are the two Treatments (Strong and Weak) and within the Treatments are the individual observations.

From a frequentist perspective, this model might be referred to as a mixed effects model in which the TREATMENT is a ‘fixed’ effect and the LEAF is a ‘random’ effect. Such terminology is inconsistent in a Bayesian context since all parameters are ‘random’. Instead, these would be referred to as population-level and group-level effects respectively. Group-level effects are so called because they are effects that differ within each group (e.g. level of a blocking factor), whereas population effects are those that have been pooled across all groups.

Model formula: \[ \begin{align} y_{i,j} &\sim{} \mathcal{N}(\mu_{i,j}, \sigma^2)\\ \mu_{i,j} &=\beta_0 + \bf{Z_j}\boldsymbol{\gamma_j} + \bf{X_i}\boldsymbol{\beta} \\ \beta_0 &\sim{} \mathcal{N}(35, 20)\\ \beta_1 &\sim{} \mathcal{N}(0, 10)\\ \boldsymbol{\gamma_j} &\sim{} \mathcal{N}(0, \boldsymbol{\Sigma})\\ \boldsymbol{\Sigma} &= \boldsymbol{D}({\sigma_l})\boldsymbol{\Omega}\boldsymbol{D}({\sigma_l})\\ \boldsymbol{\Omega} &\sim{} LKJ(\zeta)\\ \sigma_j^2 &\sim{} \mathcal{Cauchy}(0,5)\\ \sigma^2 &\sim{} Gamma(2,1)\ \end{align} \]

where:

  • \(\bf{X}\) is the model matrix representing the overall intercept and effects of the treatment on the number of lesions.
  • \(\boldsymbol{\beta}\) is a vector of the population-level effects parameters to be estimated.
  • \(\boldsymbol{\gamma}\) is a vector of the group-level effect parameters
  • \(\bf{Z}\) represents a cell means model matrix for the random intercepts (and possibly random slopes) associated with leaves.
  • the population-level intercept (\(\beta_0\)) has a gaussian prior with location of 31 and scale of 10
  • the population-level effect (\(\beta_1\)) has a gaussian prior with location of 0 and scale of 10
  • the group-level effects are assumed to sum-to-zero and be drawn from a gaussian distribution with mean of 0 and covariance of \(\Sigma\)
  • \(\boldsymbol{\Sigma}\) is the variance-covariance matrix between the groups (individual leaves). It turns out that it is difficult to apply a prior on this covariance matrix, so instead, the covariance matrix is decomposed into a correlation matrix (\(\boldsymbol{\Omega}\)) and a vector of variances (\(\boldsymbol{\sigma_l}\)) which are the diagonals (\(\boldsymbol{D}\)) of the covariance matrix.
  • \(\boldsymbol{\Omega}\) \[ \gamma \sim{} N(0,\Sigma)\\ \Sigma -> \Omega, \tau\\ \] where \(\Sigma\) is a covariance matrix.

It turns out that it is difficult to apply a prior on a covariance matrix, so instead, we decompose the covariance matrix into a correlation matrix and variance.

https://jrnold.github.io/bayesian_notes/appendix.html - Section 20.15.3 Covariance-Correlation Matrix Decomposition

  • Covariance matrix can be decomposed into a correlation matrix and a vector of variances

  • The variances can be further decomposed into the product of a simplex vector (which is a probability vector, non-negative and sums to 1) and the trace (product of the order of the matrix and the scale of the scale parameter, also the sum of its diagonal elements) of a matrix. Each element of the simplex vector represents the proportion of the trace that is attributable to the corresponding variable.

  • A prior on all the above is a decov (decomposition of covariance) function

  • The prior on the correlation matrix is called LKJ

  • density is proportional to the determinant of the correlation matrix raised to the power of the positive regularization paramter minus one.

  • The prior on the simplex vector is a symmetric Dirichlet prior which has a single (positive) concentration parameter (default of 1 implying the prior is jointly uniform over the same of simplex vectors of that size) A symmetric Dirichlet prior is used for the simplex vector. The Dirichlet prior has a single (positive) concentration parameter

  • The positive scale paramter has a gamma prior (with default shape and scale of 1 - implying a unit-exponential distribution)

  • alternatively, the lkj prior can be used for covariance.

  • as with decov, it decomposes into correlation and variances, however the variances are not further decomosed into a simplex vector and trace.

  • instead the standard deviations (variance squared) for each of the group specific paramters are given half student-t distribution with scale and df paramters specified through the scale (default 10) and df (default 1) arguments of the lkj function.

  • the lkj prior is similar, yet faster than decov

5 Fit the model

In rstanarm, the default priors are designed to be weakly informative. They are chosen to provide moderate regularlization (to help prevent overfitting) and help stabalise the computations.

tobacco.rstanarm <- stan_glmer(NUMBER ~ (1 | LEAF) + TREATMENT,
  data = tobacco,
  family = gaussian(),
  iter = 5000,
  warmup = 2000,
  chains = 3,
  thin = 5,
  refresh = 0
)
tobacco.rstanarm |> prior_summary()
Priors for model 'tobacco.rstanarm' 
------
Intercept (after predictors centered)
  Specified prior:
    ~ normal(location = 31, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 31, scale = 16)

Coefficients
  Specified prior:
    ~ normal(location = 0, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 0, scale = 32)

Auxiliary (sigma)
  Specified prior:
    ~ exponential(rate = 1)
  Adjusted prior:
    ~ exponential(rate = 0.15)

Covariance
 ~ decov(reg. = 1, conc. = 1, shape = 1, scale = 1)
------
See help('prior_summary.stanreg') for more details

This tells us:

  • for the intercept (when the family is Gaussian), a normal prior with a mean of 31 and a standard deviation of 16 is used. The 2.5 is used for scaling all parameter standard deviations. The value of 31 is based on the mean of the response (mean(tobacco$NUMBER)) and the scaled standard deviation of 16 is based on multiplying the scaling factor by the standard deviation of the response.
2.5 * sd(tobacco$NUMBER)
[1] 16.35115
  • for the coefficients (in this case, just the difference between strong and weak innoculation), the default prior is a normal prior centered around 0 with a standard deviation of 2.5. This is then adjusted for the scale of the data by multiplying the 2.5 by the ratio of the standard deviation of the response by the stanard devation of the numerical dummy variables for the predictor (then rounded).
2.5 * sd(tobacco$NUMBER) / apply(model.matrix(~TREATMENT, tobacco), 2, sd)
  (Intercept) TREATMENTWeak 
          Inf      31.66386 
  • the auxillary prior represents whatever additional prior is required for the nominated model. In the case of a Gaussian model, this will be \(\sigma\), for negative binomial, it will be the reciprocal of dispersion, for gamma, it will be shape, etc . By default in rstanarm, this is a exponential with a rate of 1 which is then adjusted by devision with the standard deviation of the response.
1 / sd(tobacco$NUMBER)
[1] 0.1528945

Lets now run with priors only so that we can explore the range of values they allow in the posteriors.

tobacco.rstanarm1 <- update(tobacco.rstanarm, prior_PD = TRUE)
ggpredict(tobacco.rstanarm1) |> plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Conclusions:

  • we see that the range of predictions is faily wide and the predicted means could range from a small negative number to a relatively large positive number.

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucket out of thin air):

  • \(\beta_0\): normal centered at 35 with a standard deviation of 7
    • mean of 33: since median(tobacco$NUMBER)
    • sd of 7: since mad(tobacco$NUMBER)
  • \(\beta_1\): normal centred at 0 with a standard deviation of 13
    • sd of 13: since sd(tobacco$NUMBER) / apply(model.matrix(~TREATMENT, tobacco), 2, sd)
  • \(\sigma\): exponential with rate of 0.15
    • since 1 / sd(tobacco$NUMBER)
  • \(\Sigma\): decov with:
    • regularization: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
    • concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
    • shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

I will also overlay the raw data for comparison.

tobacco.rstanarm2 <- stan_glmer(NUMBER ~ (1 | LEAF) + TREATMENT,
  data = tobacco,
  family = gaussian(),
  prior_intercept = normal(35, 7, autoscale = FALSE),
  prior = normal(0, 13, autoscale = FALSE),
  prior_aux = rstanarm::exponential(0.15, autoscale = FALSE),
  prior_covariance = decov(1, 1, 1, 1),
  prior_PD = TRUE,
  iter = 5000,
  warmup = 1000,
  chains = 3,
  thin = 5,
  refresh = 0
)
tobacco.rstanarm2 |>
  ggpredict() |>
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Now lets refit, conditioning on the data.

tobacco.rstanarm3 <- update(tobacco.rstanarm2, prior_PD = FALSE)
posterior_vs_prior(tobacco.rstanarm3,
  color_by = "vs", group_by = TRUE,
  facet_args = list(scales = "free_y")
)

Drawing from prior...

Conclusions:

  • in each case, the prior is substantially wider than the posterior, suggesting that the posterior is not biased towards the prior.
ggemmeans(tobacco.rstanarm3, ~TREATMENT) |> plot(show_data = TRUE)
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

ggpredict(tobacco.rstanarm3, ~TREATMENT) |> plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

In brms, the default priors are designed to be weakly informative. They are chosen to provide moderate regularlization (to help prevent overfitting) and help stabalise the computations.

Unlike rstanarm, brms models must be compiled before they start sampling. For most models, the compilation of the stan code takes around 45 seconds.

tobacco.form <- bf(NUMBER ~ (1|LEAF) + TREATMENT,
                   family = gaussian() 
                   )
options(width=100)
tobacco.form |> get_prior(data=tobacco)
                   prior     class          coef group resp dpar nlpar lb ub       source
                  (flat)         b                                                default
                  (flat)         b TREATMENTWeak                             (vectorized)
 student_t(3, 33.1, 6.5) Intercept                                                default
    student_t(3, 0, 6.5)        sd                                      0         default
    student_t(3, 0, 6.5)        sd                LEAF                  0    (vectorized)
    student_t(3, 0, 6.5)        sd     Intercept  LEAF                  0    (vectorized)
    student_t(3, 0, 6.5)     sigma                                      0         default
## tobacco.brm <- brm(tobacco.form,
##                   iter = 5000,
##                   warmup = 1000,
##                   chains = 3,
##                   thin = 5,
##                   refresh = 0)

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucket out of thin air):

  • \(\beta_0\): normal centered at 31 with a standard deviation of 7
    • mean of 31: since mean(tobacco$NUMBER)
    • sd of 7: since sd(tobacco$NUMBER)
  • \(\beta_1\): normal centred at 0 with a standard deviation of 13
    • sd of 13: since sd(tobacco$NUMBER) / apply(model.matrix(~TREATMENT, tobacco), 2, sd)
  • \(\sigma\): half-cauchy with parameters 0 and 6.5
    • since sd(tobacco$NUMBER)
  • \(\sigma_j\): half-cauchy with parameters 0 and 2.5
    • since sqrt(sd(tobacco$NUMBER))
    • we want this prior to have most mass close to zero for the purpose of regularisation
  • \(\Sigma\): decov with:
    • regularization: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
    • concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
    • shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

Note, for hierarchical models, the model will tend to want to have a large \(sigma\) in order to fit the data better. It is a good idea to regularise this tendency by applying a prior that has most mass around zero. Suitable candidates include:

  • half-t: as the degrees of freedom approach infinity, this will approach a half-normal
  • half-cauchy: this is essentially a half-t with 1 degree of freedom
  • exponential

I will also overlay the raw data for comparison.

tobacco |>
  group_by(TREATMENT) |>
  summarise(
    median(NUMBER),
    mad(NUMBER)
  )
# A tibble: 2 × 3
  TREATMENT `median(NUMBER)` `mad(NUMBER)`
  <fct>                <dbl>         <dbl>
1 Strong                34.9          2.68
2 Weak                  25.1          3.54
sd(tobacco$NUMBER) # 6.5
[1] 6.540458
standist::visualize("normal(35,3)", xlim = c(-10, 100))

standist::visualize("normal(0, 10)", xlim = c(-20, 20))

standist::visualize(
  "student_t(3,0,10)",
  "gamma(2,1)",
  "gamma(2,0.5)",
  "gamma(5,0.1)",
  "exponential(1)",
  "exponential(0.15)",
  "cauchy(0,6.5)",
  "cauchy(0, 2.5)", # since sqrt(6.5) = 2.5
  "cauchy(0,1)",
  xlim = c(-10, 25)
)

priors <- prior(normal(35, 10), class = "Intercept") +
  prior(normal(0, 8), class = "b") +
  prior(student_t(3, 0, 5), class = "sigma") +
  prior(student_t(3, 0, 5), class = "sd")
tobacco.form <- bf(NUMBER ~ (TREATMENT | LEAF) + TREATMENT,
  family = gaussian()
)
tobacco.brm2 <- brm(tobacco.form,
  data = tobacco,
  prior = priors,
  sample_prior = "only",
  iter = 5000,
  warmup = 2500,
  chains = 3, cores = 3,
  thin = 5,
  refresh = 0,
  backend = "rstan"
)
Compiling Stan program...
Start sampling

Note in the above model, the output may have included a warning message alerting us the presence of divergent transitions. Divergent transitions are an indication that the sampler has encountered poor sampling conditions - the more divergent transitions, the more severe the issue.

Typically, divergent transitions are the result of either:

  • a miss-specified model
  • priors that permit the sampler to drift into unsupported areas
  • complex posterior “features” (with high degrees of curvature) for which the sampler was inadequately tuned during the warmup phase

One useful way to diagnose the cause of divergent transitions is to explore a parallel coordinates plot. Each parameter is on the x-axis and each line represents a single MCMC draw. Divergent transitions will be highlighted in red (by default). If lines pinch together at a particular parameter, then it points to that dimension as the cause of divergent transitions.

tobacco.np <- nuts_params(tobacco.brm2)
tobacco.mcmc <- as.array(tobacco.brm2)
mcmc_parcoord(x = tobacco.mcmc, np = tobacco.np) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

tobacco.brm2 |> mcmc_parcoord(np = tobacco.np) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

tobacco.brm2 |> mcmc_parcoord(regex_pars = "^b.*|^r.*") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Accordingly, these divergent transitions can be addressed by either:

  • reviewing the model structure
  • adopting tighter priors
  • increase the adaptive delta from the default of 0.8 to closer to 1. The adaptive delta defines the average acceptance probability that the sampler should aspire to during the warmup phase. Increasing the adaptive delta results in a smaller step size (and thus fewer divergences and more robust samples) however it will also result in slower sampling speeds.
tobacco.brm2 |>
  ggpredict() |>
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

tobacco.brm3 <- update(tobacco.brm2,
  sample_prior = "yes",
  control = list(adapt_delta = 0.99, max_treedepth = 20),
  refresh = 0, cores = 3
)
The desired updates require recompiling the model
Compiling Stan program...
Start sampling
Warning: There were 2 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: Examine the pairs() plot to diagnose sampling problems
tobacco.brm3 |>
  ggpredict() |>
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

tobacco.brm3 |> get_variables()
 [1] "b_Intercept"                        "b_TREATMENTWeak"                   
 [3] "sd_LEAF__Intercept"                 "sd_LEAF__TREATMENTWeak"            
 [5] "cor_LEAF__Intercept__TREATMENTWeak" "sigma"                             
 [7] "Intercept"                          "r_LEAF[L1,Intercept]"              
 [9] "r_LEAF[L2,Intercept]"               "r_LEAF[L3,Intercept]"              
[11] "r_LEAF[L4,Intercept]"               "r_LEAF[L5,Intercept]"              
[13] "r_LEAF[L6,Intercept]"               "r_LEAF[L7,Intercept]"              
[15] "r_LEAF[L8,Intercept]"               "r_LEAF[L1,TREATMENTWeak]"          
[17] "r_LEAF[L2,TREATMENTWeak]"           "r_LEAF[L3,TREATMENTWeak]"          
[19] "r_LEAF[L4,TREATMENTWeak]"           "r_LEAF[L5,TREATMENTWeak]"          
[21] "r_LEAF[L6,TREATMENTWeak]"           "r_LEAF[L7,TREATMENTWeak]"          
[23] "r_LEAF[L8,TREATMENTWeak]"           "prior_Intercept"                   
[25] "prior_b"                            "prior_sigma"                       
[27] "prior_sd_LEAF"                      "prior_cor_LEAF"                    
[29] "lprior"                             "lp__"                              
[31] "accept_stat__"                      "stepsize__"                        
[33] "treedepth__"                        "n_leapfrog__"                      
[35] "divergent__"                        "energy__"                          
tobacco.brm3 |>
  hypothesis("TREATMENTWeak=0") |>
  plot()

tobacco.brm3 |> SUYR_prior_and_posterior()

While we are here, we might like to explore a random intercept/slope model

priors <- prior(normal(35, 10), class = "Intercept") +
  prior(normal(0, 8), class = "b") +
  prior(student_t(3, 0, 5), class = "sigma") +
  prior(cauchy(0, 5), class = "sd") +
  prior(lkj_corr_cholesky(1), class = "cor")
tobacco.form <- bf(NUMBER ~ (TREATMENT | LEAF) + TREATMENT,
  family = gaussian()
)

tobacco.brm4 <- brm(tobacco.form,
  data = tobacco,
  prior = priors,
  sample_prior = "yes",
  iter = 5000,
  warmup = 1000,
  chains = 3,
  thin = 5,
  refresh = 0,
  control = list(adapt_delta = 0.99),
  backend = "rstan"
)
Compiling Stan program...
Start sampling
Warning: There were 512 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: Examine the pairs() plot to diagnose sampling problems
tobacco.brm4 |> get_variables()
 [1] "b_Intercept"                        "b_TREATMENTWeak"                   
 [3] "sd_LEAF__Intercept"                 "sd_LEAF__TREATMENTWeak"            
 [5] "cor_LEAF__Intercept__TREATMENTWeak" "sigma"                             
 [7] "Intercept"                          "r_LEAF[L1,Intercept]"              
 [9] "r_LEAF[L2,Intercept]"               "r_LEAF[L3,Intercept]"              
[11] "r_LEAF[L4,Intercept]"               "r_LEAF[L5,Intercept]"              
[13] "r_LEAF[L6,Intercept]"               "r_LEAF[L7,Intercept]"              
[15] "r_LEAF[L8,Intercept]"               "r_LEAF[L1,TREATMENTWeak]"          
[17] "r_LEAF[L2,TREATMENTWeak]"           "r_LEAF[L3,TREATMENTWeak]"          
[19] "r_LEAF[L4,TREATMENTWeak]"           "r_LEAF[L5,TREATMENTWeak]"          
[21] "r_LEAF[L6,TREATMENTWeak]"           "r_LEAF[L7,TREATMENTWeak]"          
[23] "r_LEAF[L8,TREATMENTWeak]"           "prior_Intercept"                   
[25] "prior_b"                            "prior_sigma"                       
[27] "prior_sd_LEAF"                      "prior_cor_LEAF"                    
[29] "lprior"                             "lp__"                              
[31] "accept_stat__"                      "stepsize__"                        
[33] "treedepth__"                        "n_leapfrog__"                      
[35] "divergent__"                        "energy__"                          
tobacco.brm4 |>
  hypothesis("TREATMENTWeak=0") |>
  plot()

tobacco.brm4 |> SUYR_prior_and_posterior()

tobacco.brm4 |>
  posterior_samples() |>
  dplyr::select(-`lp__`) |>
  pivot_longer(everything(), names_to = "key") |>
  filter(!str_detect(key, "^r")) |>
  mutate(
    Type = ifelse(str_detect(key, "prior"), "Prior", "Posterior"),
    ## Class = ifelse(str_detect(key, 'Intercept'),  'Intercept',
    ##         ifelse(str_detect(key, 'b'),  'b', 'sigma')),
    Class = case_when(
      str_detect(key, "(^b|^prior).*Intercept$") ~ "Intercept",
      str_detect(key, "b_TREATMENT|prior_b") ~ "TREATMENT",
      str_detect(key, "sd") ~ "sd",
      str_detect(key, "^cor|prior_cor") ~ "cor",
      str_detect(key, "sigma") ~ "sigma"
    ),
    Par = str_replace(key, "b_", "")
  ) |>
  ggplot(aes(x = Type, y = value, color = Par)) +
  stat_pointinterval(position = position_dodge()) +
  facet_wrap(~Class, scales = "free")
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.

We can compare the two models using LOO

(l.1 <- tobacco.brm3 |> loo())
Warning: Found 6 observations with a pareto_k > 0.69 in model 'tobacco.brm3'.
We recommend to run more iterations to get at least about 2200 posterior draws
to improve LOO-CV approximation accuracy.

Computed from 1500 by 16 log-likelihood matrix.

         Estimate  SE
elpd_loo    -49.6 2.5
p_loo         8.9 1.7
looic        99.1 4.9
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.4, 0.5]).

Pareto k diagnostic values:
                          Count Pct.    Min. ESS
(-Inf, 0.69]   (good)     10    62.5%   111     
   (0.69, 1]   (bad)       6    37.5%   <NA>    
    (1, Inf)   (very bad)  0     0.0%   <NA>    
See help('pareto-k-diagnostic') for details.
(l.2 <- tobacco.brm4 |> loo())
Warning: Found 4 observations with a pareto_k > 0.7 in model 'tobacco.brm4'. We
recommend to set 'moment_match = TRUE' in order to perform moment matching for
problematic observations.

Computed from 2400 by 16 log-likelihood matrix.

         Estimate  SE
elpd_loo    -49.4 2.6
p_loo        10.3 1.9
looic        98.8 5.3
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.2, 0.2]).

Pareto k diagnostic values:
                         Count Pct.    Min. ESS
(-Inf, 0.7]   (good)     12    75.0%   42      
   (0.7, 1]   (bad)       4    25.0%   <NA>    
   (1, Inf)   (very bad)  0     0.0%   <NA>    
See help('pareto-k-diagnostic') for details.
loo_compare(l.1, l.2)
             elpd_diff se_diff
tobacco.brm4  0.0       0.0   
tobacco.brm3 -0.2       0.3   

Whilst the random intercept/slope has a slightly smaller looic, when we consider the standard error (or rather an interval of +- 2.5xSE), there is no support for this more complex model over the simpler random intercept only model. Consequently, we will continue with the random intercept model.

6 MCMC sampling diagnostics

The bayesplot package offers a range of MCMC diagnostics as well as Posterior Probability Checks (PPC), all of which have a convenient plot() interface. Lets start with the MCMC diagnostics.

See list of available diagnostics by name
available_mcmc()
bayesplot MCMC module:
  mcmc_acf
  mcmc_acf_bar
  mcmc_areas
  mcmc_areas_ridges
  mcmc_combo
  mcmc_dens
  mcmc_dens_chains
  mcmc_dens_overlay
  mcmc_hex
  mcmc_hist
  mcmc_hist_by_chain
  mcmc_intervals
  mcmc_neff
  mcmc_neff_hist
  mcmc_nuts_acceptance
  mcmc_nuts_divergence
  mcmc_nuts_energy
  mcmc_nuts_stepsize
  mcmc_nuts_treedepth
  mcmc_pairs
  mcmc_parcoord
  mcmc_rank_ecdf
  mcmc_rank_hist
  mcmc_rank_overlay
  mcmc_recover_hist
  mcmc_recover_intervals
  mcmc_recover_scatter
  mcmc_rhat
  mcmc_rhat_hist
  mcmc_scatter
  mcmc_trace
  mcmc_trace_highlight
  mcmc_violin

Of these, we will focus on:

  • trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different shade of blue, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
tobacco.brm3 |> mcmc_plot(type = "trace")

The chains appear well mixed and very similar

  • acf_bar (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
tobacco.brm3 |> mcmc_plot(type = "acf_bar")

There is no evidence of autocorrelation in the MCMC samples

  • rhat_hist: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
tobacco.brm3 |> mcmc_plot(type = "rhat_hist")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

  • neff_hist (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

    If the ratios are low, tightening the priors may help.

tobacco.brm2 |> mcmc_plot(type = "neff_hist")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

More diagnostics
tobacco.brm3 |> mcmc_plot(type = "combo")

tobacco.brm3 |> mcmc_plot(type = "violin")

The rstan package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.

Of these, we will focus on:

  • stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
tobacco.brm3 |> get_variables()
 [1] "b_Intercept"                        "b_TREATMENTWeak"                   
 [3] "sd_LEAF__Intercept"                 "sd_LEAF__TREATMENTWeak"            
 [5] "cor_LEAF__Intercept__TREATMENTWeak" "sigma"                             
 [7] "Intercept"                          "r_LEAF[L1,Intercept]"              
 [9] "r_LEAF[L2,Intercept]"               "r_LEAF[L3,Intercept]"              
[11] "r_LEAF[L4,Intercept]"               "r_LEAF[L5,Intercept]"              
[13] "r_LEAF[L6,Intercept]"               "r_LEAF[L7,Intercept]"              
[15] "r_LEAF[L8,Intercept]"               "r_LEAF[L1,TREATMENTWeak]"          
[17] "r_LEAF[L2,TREATMENTWeak]"           "r_LEAF[L3,TREATMENTWeak]"          
[19] "r_LEAF[L4,TREATMENTWeak]"           "r_LEAF[L5,TREATMENTWeak]"          
[21] "r_LEAF[L6,TREATMENTWeak]"           "r_LEAF[L7,TREATMENTWeak]"          
[23] "r_LEAF[L8,TREATMENTWeak]"           "prior_Intercept"                   
[25] "prior_b"                            "prior_sigma"                       
[27] "prior_sd_LEAF"                      "prior_cor_LEAF"                    
[29] "lprior"                             "lp__"                              
[31] "accept_stat__"                      "stepsize__"                        
[33] "treedepth__"                        "n_leapfrog__"                      
[35] "divergent__"                        "energy__"                          
pars <- tobacco.brm3 |> get_variables()
pars <- str_extract(pars, "^b_.*|^sigma$|^sd.*") |> na.omit()

tobacco.brm3$fit |> stan_trace(pars = pars)

The chains appear well mixed and very similar

  • stan_acf (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
tobacco.brm3$fit |>
  stan_ac(pars = pars)

There is no evidence of autocorrelation in the MCMC samples

  • stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
tobacco.brm3$fit |> stan_rhat()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

  • stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

    If the ratios are low, tightening the priors may help.

tobacco.brm2$fit |> stan_ess()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

tobacco.brm3$fit |>
  stan_dens(separate_chains = TRUE, pars = pars)

The ggmean package also as a set of MCMC diagnostic functions. Lets start with the MCMC diagnostics.

Of these, we will focus on:

  • ggs_traceplot: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
tobacco.ggs <- tobacco.brm3 |> ggs(burnin = FALSE, inc_warmup = FALSE)
Warning in custom.sort(D$Parameter): NAs introduced by coercion
Warning in custom.sort(D$Parameter): NAs introduced by coercion
tobacco.ggs |> ggs_traceplot()

The chains appear well mixed and very similar

  • gss_autocorrelation (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
ggs_autocorrelation(tobacco.ggs)

There is no evidence of autocorrelation in the MCMC samples

  • stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
ggs_Rhat(tobacco.ggs)

All Rhat values are below 1.05, suggesting the chains have converged.

  • stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

    If the ratios are low, tightening the priors may help.

ggs_effective(tobacco.ggs)
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.
ℹ The deprecated feature was likely used in the ggmcmc package.
  Please report the issue at <https://github.com/xfim/ggmcmc/issues/>.

Ratios all very high.

More diagnostics
ggs_crosscorrelation(tobacco.ggs)

ggs_grb(tobacco.ggs)

Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.

See list of available diagnostics by name
available_ppc()
bayesplot PPC module:
  ppc_bars
  ppc_bars_grouped
  ppc_boxplot
  ppc_dens
  ppc_dens_overlay
  ppc_dens_overlay_grouped
  ppc_ecdf_overlay
  ppc_ecdf_overlay_grouped
  ppc_error_binned
  ppc_error_hist
  ppc_error_hist_grouped
  ppc_error_scatter
  ppc_error_scatter_avg
  ppc_error_scatter_avg_grouped
  ppc_error_scatter_avg_vs_x
  ppc_freqpoly
  ppc_freqpoly_grouped
  ppc_hist
  ppc_intervals
  ppc_intervals_grouped
  ppc_km_overlay
  ppc_km_overlay_grouped
  ppc_loo_intervals
  ppc_loo_pit_ecdf
  ppc_loo_pit_overlay
  ppc_loo_pit_qq
  ppc_loo_ribbon
  ppc_pit_ecdf
  ppc_pit_ecdf_grouped
  ppc_ribbon
  ppc_ribbon_grouped
  ppc_rootogram
  ppc_scatter
  ppc_scatter_avg
  ppc_scatter_avg_grouped
  ppc_stat
  ppc_stat_2d
  ppc_stat_freqpoly
  ppc_stat_freqpoly_grouped
  ppc_stat_grouped
  ppc_violin_grouped
  • dens_overlay: plots the density distribution of the observed data (black line) overlayed ontop of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.
tobacco.brm3 |> pp_check(type = "dens_overlay", ndraws = 100)

The model draws appear to be consistent with the observed data.

  • error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. Note, this is not really that useful for models that involve a binomial response
tobacco.brm3 |> pp_check(type = "error_scatter_avg")
Using all posterior draws for ppc type 'error_scatter_avg' by default.

This is not really interpretable

  • intervals: plots the observed data overlayed ontop of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.
tobacco.brm3 |> pp_check(group = "TREATMENT", type = "intervals")
Using all posterior draws for ppc type 'intervals' by default.
Warning: The following arguments were unrecognized and ignored: group

tobacco.brm3 |> pp_check(group = "TREATMENT", type = "intervals_grouped")
Using all posterior draws for ppc type 'intervals_grouped' by default.

tobacco.brm3 |> pp_check(group = "TREATMENT", type = "violin_grouped")
Using all posterior draws for ppc type 'violin_grouped' by default.

The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.

# library(shinystan)
# launch_shinystan(tobacco.brm2)

DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components ourself, we can still obtain the simulated residuals from the fitted stan model.

We need to supply:

  • simulated (predicted) responses associated with each observation.
  • observed values
  • fitted (predicted) responses (averaged) associated with each observation
preds <- tobacco.brm4 |> posterior_predict(ndraws = 250, summary = FALSE)
tobacco.resids <- createDHARMa(
  simulatedResponse = t(preds),
  observedResponse = tobacco$NUMBER,
  fittedPredictedResponse = apply(preds, 2, median),
  integerResponse = FALSE
)
plot(tobacco.resids, quantreg = FALSE)

tobacco.resids <- make_brms_dharma_res(tobacco.brm3, integerResponse = FALSE)
wrap_elements(~ testUniformity(tobacco.resids)) +
  wrap_elements(~ plotResiduals(tobacco.resids, form = factor(rep(1, nrow(tobacco))))) +
  wrap_elements(~ plotResiduals(tobacco.resids, quantreg = FALSE)) +
  wrap_elements(~ testDispersion(tobacco.resids))

Conclusions:

  • the simulated residuals do not suggest any issues with the residuals
  • there is no evidence of a lack of fit.

7 Partial effects plots

tobacco.brm3 |>
  conditional_effects() |>
  plot(points = TRUE)

tobacco.brm3 |>
  ggpredict() |>
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

tobacco.brm3 |>
  ggemmeans(~TREATMENT) |>
  plot(show_data = TRUE) +
  geom_point(data = tobacco, aes(y = NUMBER, x = as.numeric(TREATMENT)))
Warning in stats::qt(ci, df = dof): NaNs produced
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Partial.obs <- tobacco.brm3$data |>
  mutate(
    Pred = predict(tobacco.brm3)[, "Estimate"],
    Resid = resid(tobacco.brm3)[, "Estimate"],
    Obs = Pred + Resid
  )

tobacco.brm3 |>
  fitted_draws(newdata = tobacco) |>
  median_hdci() |>
  ggplot(aes(x = TREATMENT, y = .value)) +
  geom_pointrange(aes(ymin = .lower, ymax = .upper)) +
  geom_line() +
  geom_point(
    data = Partial.obs, aes(y = Obs, x = TREATMENT), color = "red",
    position = position_nudge(x = 0.1)
  ) +
  geom_point(
    data = tobacco, aes(y = NUMBER, x = TREATMENT),
    position = position_nudge(x = 0.05)
  )
Warning: `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
- Use [add_]epred_draws() to get the expectation of the posterior predictive.
- Use [add_]linpred_draws() to get the distribution of the linear predictor.
- For example, you used [add_]fitted_draws(..., scale = "response"), which
  means you most likely want [add_]epred_draws(...).
NOTE: When updating to the new functions, note that the `model` parameter is now
  named `object` and the `n` parameter is now named `ndraws`.

tobacco.brm3 |>
  epred_draws(newdata = tobacco) |>
  ggplot() +
  geom_violin(data = tobacco, aes(y = NUMBER, x = TREATMENT), fill = "blue", alpha = 0.2) +
  geom_point(
    data = tobacco, aes(y = NUMBER, x = TREATMENT),
    position = position_jitter(width = 0.1, height = 0)
  ) +
  geom_violin(aes(y = .epred, x = TREATMENT), fill = "orange", alpha = 0.2) +
  theme_bw()

8 Model investigation

The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).

tobacco.brm3 |> summary()
Warning: There were 2 divergent transitions after warmup. Increasing
adapt_delta above 0.99 may help. See
http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: NUMBER ~ (TREATMENT | LEAF) + TREATMENT 
   Data: tobacco (Number of observations: 16) 
  Draws: 3 chains, each with iter = 5000; warmup = 2500; thin = 5;
         total post-warmup draws = 1500

Multilevel Hyperparameters:
~LEAF (Number of levels: 8) 
                             Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
sd(Intercept)                    3.53      2.11     0.20     8.21 1.01     1002
sd(TREATMENTWeak)                3.02      2.17     0.12     7.88 1.00     1027
cor(Intercept,TREATMENTWeak)    -0.03      0.55    -0.92     0.92 1.00     1517
                             Tail_ESS
sd(Intercept)                    1262
sd(TREATMENTWeak)                1403
cor(Intercept,TREATMENTWeak)     1409

Regression Coefficients:
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept        34.70      2.02    30.75    38.64 1.00     1308     1422
TREATMENTWeak    -7.16      2.37   -11.59    -2.27 1.00     1495     1457

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     4.08      1.38     1.46     6.96 1.01      798      603

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Warning: There were 2 divergent transitions after warmup. Increasing
adapt_delta above 0.99 may help. See
http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

Conclusions:

  • the intercept indicates that the Strong treatment has an average of 34.7 lesions.
  • the Weak treatment has 7.16 fewer lesions.
  • the variance in intercepts across all Leaves is 3.53
  • the scale of variance between Leaves is very similar to the variance within Leaves (sigma, 4.08).
tobacco.brm3 |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    N = ~ length(.x),
    Pl = ~ mean(.x < 0),
    Pg = ~ mean(.x > 0),
    rhat,
    ess_bulk,
    ess_tail
  )
# A tibble: 30 × 10
   variable     median    lower  upper     N    Pl    Pg  rhat ess_bulk ess_tail
   <chr>         <dbl>    <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
 1 b_Intercept 34.7     3.07e+1 38.6    1500 0     1     0.999    1308.    1422.
 2 b_TREATMEN… -7.27   -1.16e+1 -2.26   1500 0.994 0.006 1.00     1495.    1457.
 3 sd_LEAF__I…  3.38    1.08e-2  7.20   1500 0     1     1.01     1002.    1262.
 4 sd_LEAF__T…  2.68    1.05e-3  7.06   1500 0     1     1.00     1028.    1403.
 5 cor_LEAF__… -0.0660 -9.12e-1  0.927  1500 0.539 0.461 1.00     1516.    1409.
 6 sigma        3.97    1.25e+0  6.68   1500 0     1     1.01      797.     603.
 7 Intercept   31.1     2.73e+1 34.5    1500 0     1     1.00     1393.    1422.
 8 r_LEAF[L1,…  0.0254 -4.81e+0  5.13   1500 0.494 0.506 1.00     1492.    1391.
 9 r_LEAF[L2,… -0.554  -6.49e+0  3.49   1500 0.639 0.361 1.00     1585.    1537.
10 r_LEAF[L3,… -0.156  -5.42e+0  4.45   1500 0.544 0.456 1.01     1497.    1380.
# ℹ 20 more rows
## or if you want to exclude some parameters
tobacco.brm3 |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    rhat,
    ess_bulk,
    ess_tail
  ) |>
  filter(str_detect(variable, "prior|^r_|^lp__", negate = TRUE))
# A tibble: 7 × 7
  variable                        median    lower  upper  rhat ess_bulk ess_tail
  <chr>                            <dbl>    <dbl>  <dbl> <dbl>    <dbl>    <dbl>
1 b_Intercept                    34.7     3.07e+1 38.6   0.999    1308.    1422.
2 b_TREATMENTWeak                -7.27   -1.16e+1 -2.26  1.00     1495.    1457.
3 sd_LEAF__Intercept              3.38    1.08e-2  7.20  1.01     1002.    1262.
4 sd_LEAF__TREATMENTWeak          2.68    1.05e-3  7.06  1.00     1028.    1403.
5 cor_LEAF__Intercept__TREATMEN… -0.0660 -9.12e-1  0.927 1.00     1516.    1409.
6 sigma                           3.97    1.25e+0  6.68  1.01      797.     603.
7 Intercept                      31.1     2.73e+1 34.5   1.00     1393.    1422.
tobacco.brm3 |> as_draws_df()
# A draws_df: 500 iterations, 3 chains, and 30 variables
   b_Intercept b_TREATMENTWeak sd_LEAF__Intercept sd_LEAF__TREATMENTWeak
1           35            -6.4               3.50                   0.28
2           39            -8.0               3.79                   0.60
3           36            -8.2               7.60                   2.66
4           35            -8.5               3.57                   2.97
5           34            -7.6               1.89                   2.30
6           31            -5.2               0.87                   0.16
7           33            -5.3               0.85                   4.13
8           30             1.2               1.34                   2.83
9           35            -7.4               2.84                   2.39
10          34            -8.6               5.47                   4.04
   cor_LEAF__Intercept__TREATMENTWeak sigma Intercept r_LEAF[L1,Intercept]
1                              -0.205   2.2        32               -0.065
2                              -0.465   7.6        35               -5.363
3                               0.571   3.3        32                3.674
4                               0.433   3.1        31               -0.250
5                               0.141   4.8        31                1.615
6                              -0.028   4.8        28                1.541
7                               0.779   4.5        30                0.529
8                               0.383   7.5        30               -0.511
9                              -0.199   6.7        32                2.429
10                             -0.860   4.6        30                7.619
# ... with 1490 more draws, and 22 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
tobacco.brm3 |>
  as_draws_df() |>
  dplyr::select(matches("^b_.*|^sigma$|^sd_.*")) |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    Pg = ~ mean(.x > 0),
    Pl = ~ mean(.x < 0),
    rhat,
    ess_bulk,
    ess_tail
  )
Warning: Dropping 'draws_df' class as required metadata was removed.
# A tibble: 5 × 9
  variable             median    lower upper    Pg    Pl  rhat ess_bulk ess_tail
  <chr>                 <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
1 b_Intercept           34.7   3.07e+1 38.6  1     0     1.000    1284.    1414.
2 b_TREATMENTWeak       -7.27 -1.16e+1 -2.26 0.006 0.994 1.000    1484.    1447.
3 sd_LEAF__Intercept     3.38  1.08e-2  7.20 1     0     1.01      917.    1190.
4 sd_LEAF__TREATMENTW…   2.68  1.05e-3  7.06 1     0     1.00     1012.    1380.
5 sigma                  3.97  1.25e+0  6.68 1     0     1.00      778.     597.
## or if you want to exclude some parameters
tobacco.brm3 |>
  as_draws_df() |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    rhat,
    ess_bulk,
    ess_tail
  ) |>
  filter(str_detect(variable, "prior|^r_|^lp__", negate = TRUE))
# A tibble: 7 × 7
  variable                        median    lower  upper  rhat ess_bulk ess_tail
  <chr>                            <dbl>    <dbl>  <dbl> <dbl>    <dbl>    <dbl>
1 b_Intercept                    34.7     3.07e+1 38.6   0.999    1308.    1422.
2 b_TREATMENTWeak                -7.27   -1.16e+1 -2.26  1.00     1495.    1457.
3 sd_LEAF__Intercept              3.38    1.08e-2  7.20  1.01     1002.    1262.
4 sd_LEAF__TREATMENTWeak          2.68    1.05e-3  7.06  1.00     1028.    1403.
5 cor_LEAF__Intercept__TREATMEN… -0.0660 -9.12e-1  0.927 1.00     1516.    1409.
6 sigma                           3.97    1.25e+0  6.68  1.01      797.     603.
7 Intercept                      31.1     2.73e+1 34.5   1.00     1393.    1422.
tobacco.brm3$fit |>
  tidyMCMC(
    estimate.method = "median",
    conf.int = TRUE, conf.method = "HPDinterval",
    rhat = TRUE, ess = TRUE
  )
# A tibble: 29 × 7
   term                        estimate std.error conf.low conf.high  rhat   ess
   <chr>                          <dbl>     <dbl>    <dbl>     <dbl> <dbl> <int>
 1 b_Intercept                  34.7        2.02   3.07e+1    38.6   0.999  1295
 2 b_TREATMENTWeak              -7.16       2.37  -1.16e+1    -2.26  1.000  1471
 3 sd_LEAF__Intercept            3.53       2.11   1.08e-2     7.20  1.01    981
 4 sd_LEAF__TREATMENTWeak        3.02       2.17   1.05e-3     7.06  1.00   1064
 5 cor_LEAF__Intercept__TREAT…  -0.0336     0.545 -9.12e-1     0.927 1.00   1546
 6 sigma                         4.08       1.38   1.25e+0     6.68  1.00    835
 7 Intercept                    31.1        1.80   2.73e+1    34.5   1.000  1401
 8 r_LEAF[L1,Intercept]          0.0247     2.40  -4.81e+0     5.13  1.000  1490
 9 r_LEAF[L2,Intercept]         -0.936      2.50  -6.49e+0     3.49  1.00   1611
10 r_LEAF[L3,Intercept]         -0.293      2.41  -5.42e+0     4.45  1.00   1514
# ℹ 19 more rows

Conclusions:

see above

tobacco.brm3 |> get_variables()
 [1] "b_Intercept"                        "b_TREATMENTWeak"                   
 [3] "sd_LEAF__Intercept"                 "sd_LEAF__TREATMENTWeak"            
 [5] "cor_LEAF__Intercept__TREATMENTWeak" "sigma"                             
 [7] "Intercept"                          "r_LEAF[L1,Intercept]"              
 [9] "r_LEAF[L2,Intercept]"               "r_LEAF[L3,Intercept]"              
[11] "r_LEAF[L4,Intercept]"               "r_LEAF[L5,Intercept]"              
[13] "r_LEAF[L6,Intercept]"               "r_LEAF[L7,Intercept]"              
[15] "r_LEAF[L8,Intercept]"               "r_LEAF[L1,TREATMENTWeak]"          
[17] "r_LEAF[L2,TREATMENTWeak]"           "r_LEAF[L3,TREATMENTWeak]"          
[19] "r_LEAF[L4,TREATMENTWeak]"           "r_LEAF[L5,TREATMENTWeak]"          
[21] "r_LEAF[L6,TREATMENTWeak]"           "r_LEAF[L7,TREATMENTWeak]"          
[23] "r_LEAF[L8,TREATMENTWeak]"           "prior_Intercept"                   
[25] "prior_b"                            "prior_sigma"                       
[27] "prior_sd_LEAF"                      "prior_cor_LEAF"                    
[29] "lprior"                             "lp__"                              
[31] "accept_stat__"                      "stepsize__"                        
[33] "treedepth__"                        "n_leapfrog__"                      
[35] "divergent__"                        "energy__"                          
tobacco.draw <- tobacco.brm3 |>
  gather_draws(`b.Intercept.*|b_TREAT.*|sd_.*|sigma`, regex = TRUE)
tobacco.draw
# A tibble: 7,500 × 5
# Groups:   .variable [5]
   .chain .iteration .draw .variable   .value
    <int>      <int> <int> <chr>        <dbl>
 1      1          1     1 b_Intercept   35.0
 2      1          2     2 b_Intercept   38.7
 3      1          3     3 b_Intercept   35.6
 4      1          4     4 b_Intercept   35.3
 5      1          5     5 b_Intercept   34.4
 6      1          6     6 b_Intercept   30.9
 7      1          7     7 b_Intercept   33.1
 8      1          8     8 b_Intercept   29.5
 9      1          9     9 b_Intercept   35.2
10      1         10    10 b_Intercept   33.8
# ℹ 7,490 more rows

We can then summarise this

tobacco.draw |> median_hdci()
# A tibble: 5 × 7
  .variable              .value    .lower .upper .width .point .interval
  <chr>                   <dbl>     <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 b_Intercept             34.7   30.7      38.6    0.95 median hdci     
2 b_TREATMENTWeak         -7.27 -12.1      -2.64   0.95 median hdci     
3 sd_LEAF__Intercept       3.38   0.0108    7.18   0.95 median hdci     
4 sd_LEAF__TREATMENTWeak   2.68   0.00105   7.05   0.95 median hdci     
5 sigma                    3.97   1.25      6.68   0.95 median hdci     
tobacco.brm3 |>
  gather_draws(`b_Intercept.*|b_TREAT.*`, regex = TRUE) |>
  ggplot() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  stat_slab(aes(
    x = .value, y = .variable,
    fill = stat(ggdist::cut_cdf_qi(cdf,
      .width = c(0.5, 0.8, 0.95),
      labels = scales::percent_format()
    ))
  ), color = "black") +
  scale_fill_brewer("Interval", direction = -1, na.translate = FALSE)
Warning: `stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95), labels =
scales::percent_format()))` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95),
  labels = scales::percent_format()))` instead.

tobacco.brm3 |>
  gather_draws(`.Intercept.*|.*TREAT.*`, regex = TRUE) |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  facet_wrap(~.variable, scales = "free")

tobacco.brm3 |>
  gather_draws(`.Intercept.*|.*TREAT.*`, regex = TRUE) |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  theme_classic()

tobacco.brm3$fit |> plot(type = "intervals")
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

tobacco.brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  facet_wrap(~.variable, scales = "free")

tobacco.brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  geom_vline(xintercept = 0, linetype = "dashed")

tobacco.brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
  geom_vline(xintercept = 0, linetype = "dashed")
Picking joint bandwidth of 0.48

## Or in colour
tobacco.brm3 |>
  gather_draws(`^b_.*`, regex = TRUE) |>
  filter(.variable != "b_Intercept") |>
  ggplot() +
  geom_density_ridges_gradient(
    aes(
      x = exp(.value),
      y = .variable,
      fill = stat(x)
    ),
    alpha = 0.4, colour = "white",
    quantile_lines = TRUE,
    quantiles = c(0.025, 0.975)
  ) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_x_continuous(trans = scales::log2_trans()) +
  scale_fill_viridis_c(option = "C")
Picking joint bandwidth of 0.692

tobacco.brm3 |> tidy_draws()
# A tibble: 1,500 × 39
   .chain .iteration .draw b_Intercept b_TREATMENTWeak sd_LEAF__Intercept
    <int>      <int> <int>       <dbl>           <dbl>              <dbl>
 1      1          1     1        35.0           -6.37              3.50 
 2      1          2     2        38.7           -7.95              3.79 
 3      1          3     3        35.6           -8.18              7.60 
 4      1          4     4        35.3           -8.50              3.57 
 5      1          5     5        34.4           -7.64              1.89 
 6      1          6     6        30.9           -5.16              0.867
 7      1          7     7        33.1           -5.28              0.848
 8      1          8     8        29.5            1.24              1.34 
 9      1          9     9        35.2           -7.36              2.84 
10      1         10    10        33.8           -8.63              5.47 
# ℹ 1,490 more rows
# ℹ 33 more variables: sd_LEAF__TREATMENTWeak <dbl>,
#   cor_LEAF__Intercept__TREATMENTWeak <dbl>, sigma <dbl>, Intercept <dbl>,
#   `r_LEAF[L1,Intercept]` <dbl>, `r_LEAF[L2,Intercept]` <dbl>,
#   `r_LEAF[L3,Intercept]` <dbl>, `r_LEAF[L4,Intercept]` <dbl>,
#   `r_LEAF[L5,Intercept]` <dbl>, `r_LEAF[L6,Intercept]` <dbl>,
#   `r_LEAF[L7,Intercept]` <dbl>, `r_LEAF[L8,Intercept]` <dbl>, …
tobacco.brm3 |> spread_draws(`.*Intercept.*|.*TREAT.*`, regex = TRUE)
# A tibble: 1,500 × 26
   .chain .iteration .draw b_Intercept b_TREATMENTWeak sd_LEAF__Intercept
    <int>      <int> <int>       <dbl>           <dbl>              <dbl>
 1      1          1     1        35.0           -6.37              3.50 
 2      1          2     2        38.7           -7.95              3.79 
 3      1          3     3        35.6           -8.18              7.60 
 4      1          4     4        35.3           -8.50              3.57 
 5      1          5     5        34.4           -7.64              1.89 
 6      1          6     6        30.9           -5.16              0.867
 7      1          7     7        33.1           -5.28              0.848
 8      1          8     8        29.5            1.24              1.34 
 9      1          9     9        35.2           -7.36              2.84 
10      1         10    10        33.8           -8.63              5.47 
# ℹ 1,490 more rows
# ℹ 20 more variables: sd_LEAF__TREATMENTWeak <dbl>,
#   cor_LEAF__Intercept__TREATMENTWeak <dbl>, Intercept <dbl>,
#   `r_LEAF[L1,Intercept]` <dbl>, `r_LEAF[L2,Intercept]` <dbl>,
#   `r_LEAF[L3,Intercept]` <dbl>, `r_LEAF[L4,Intercept]` <dbl>,
#   `r_LEAF[L5,Intercept]` <dbl>, `r_LEAF[L6,Intercept]` <dbl>,
#   `r_LEAF[L7,Intercept]` <dbl>, `r_LEAF[L8,Intercept]` <dbl>, …
tobacco.brm3 |>
  posterior_samples() |>
  as_tibble()
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.
# A tibble: 1,500 × 30
   b_Intercept b_TREATMENTWeak sd_LEAF__Intercept sd_LEAF__TREATMENTWeak
         <dbl>           <dbl>              <dbl>                  <dbl>
 1        35.0           -6.37              3.50                   0.279
 2        38.7           -7.95              3.79                   0.595
 3        35.6           -8.18              7.60                   2.66 
 4        35.3           -8.50              3.57                   2.97 
 5        34.4           -7.64              1.89                   2.30 
 6        30.9           -5.16              0.867                  0.155
 7        33.1           -5.28              0.848                  4.13 
 8        29.5            1.24              1.34                   2.83 
 9        35.2           -7.36              2.84                   2.39 
10        33.8           -8.63              5.47                   4.04 
# ℹ 1,490 more rows
# ℹ 26 more variables: cor_LEAF__Intercept__TREATMENTWeak <dbl>, sigma <dbl>,
#   Intercept <dbl>, `r_LEAF[L1,Intercept]` <dbl>,
#   `r_LEAF[L2,Intercept]` <dbl>, `r_LEAF[L3,Intercept]` <dbl>,
#   `r_LEAF[L4,Intercept]` <dbl>, `r_LEAF[L5,Intercept]` <dbl>,
#   `r_LEAF[L6,Intercept]` <dbl>, `r_LEAF[L7,Intercept]` <dbl>,
#   `r_LEAF[L8,Intercept]` <dbl>, `r_LEAF[L1,TREATMENTWeak]` <dbl>, …
tobacco.brm3 |>
  bayes_R2(re.form = NA, summary = FALSE) |>
  median_hdci()
          y       ymin      ymax .width .point .interval
1 0.3489184 0.06889028 0.5651569   0.95 median      hdci
tobacco.brm3 |>
  bayes_R2(re.form = ~ (1 | LEAF), summary = FALSE) |>
  median_hdci()
          y      ymin     ymax .width .point .interval
1 0.5841075 0.1484481 0.815303   0.95 median      hdci
tobacco.brm3 |>
  bayes_R2(re.form = ~ (TREATMENT | LEAF), summary = FALSE) |>
  median_hdci()
          y      ymin      ymax .width .point .interval
1 0.6526696 0.2792442 0.9878788   0.95 median      hdci

Region of Practical Equivalence

0.1 * sd(tobacco$NUMBER)
[1] 0.6540458
tobacco.brm3 |> rope(range = c(-0.65, 0.65))
# Proportion of samples inside the ROPE [-0.65, 0.65]:

Parameter     | inside ROPE
---------------------------
Intercept     |      0.00 %
TREATMENTWeak |      0.00 %
rope(tobacco.brm3, range = c(-0.65, 0.65)) |> plot()

## Or based on fractional scale
tobacco.brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  group_by(.draw) |>
  arrange(desc(TREATMENT)) |>
  summarise(Diff = 100 * (exp(diff(log(.value))) - 1)) |>
  rope(range = c(-10, 10))
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
# Proportion of samples inside the ROPE [-10.00, 10.00]:

Parameter | inside ROPE
-----------------------
.draw     |      0.00 %
Diff      |      1.62 %
tobacco.brm3 |> modelsummary(
  statistic = c("conf.low", "conf.high"),
  shape = term ~ statistic,
  exponentiate = TRUE
)
Warning: 
`modelsummary` uses the `performance` package to extract goodness-of-fit
statistics from models of this class. You can specify the statistics you wish
to compute by supplying a `metrics` argument to `modelsummary`, which will then
push it forward to `performance`. Acceptable values are: "all", "common",
"none", or a character vector of metrics names. For example: `modelsummary(mod,
metrics = c("RMSE", "R2")` Note that some metrics are computationally
expensive. See `?performance::performance` for details.
 This warning appears once per session.
(1)
Est. 2.5 % 97.5 %
b_Intercept 1.211229e+15 2.271382e+13 6.026902e+16
b_TREATMENTWeak 1.000000e-03 0.000000e+00 1.040000e-01
sd_LEAF__Intercept 2.942100e+01 1.226000e+00 3.661824e+03
sd_LEAF__TREATMENTWeak 1.458000e+01 1.125000e+00 2.643213e+03
cor_LEAF__Intercept__TREATMENTWeak 9.360000e-01 3.990000e-01 2.520000e+00
sigma 5.323000e+01 4.296000e+00 1.055628e+03
Num.Obs. 16
R2 0.653
R2 Adj. 0.464
R2 Marg. 0.349
ICC 1.0
ELPD -49.6
ELPD s.e. 2.5
LOOIC 99.1
LOOIC s.e. 4.9
WAIC 97.3
RMSE 2.73
r2.adjusted.marginal -0.139874119550532
tobacco.brm3 |> modelplot(exponentiate = TRUE)

9 Further investigations

tobacco.brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  group_by(.draw) |>
  arrange(desc(TREATMENT)) |>
  summarise(Diff = 100 * (exp(diff(log(.value))) - 1)) |>
  summarise_draws(
    median,
    ~ HDInterval::hdi(.x),
    rhat,
    ess_bulk,
    ess_tail
  )
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
# A tibble: 1 × 7
  variable median lower upper  rhat ess_bulk ess_tail
  <chr>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
1 Diff       26.5  6.18  45.4 1.000    1494.    1376.
## Or via gather and pivot
newdata <- tobacco.brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  pivot_wider(names_from = TREATMENT, values_from = .value) |>
  mutate(
    Eff = Strong - Weak,
    PEff = 100 * Eff / Weak
  )
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
newdata |> median_hdci(PEff)
# A tibble: 1 × 6
   PEff .lower .upper .width .point .interval
  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1  26.5   6.40   45.7   0.95 median hdci     
newdata |> summarise(P = mean(PEff > 0))
# A tibble: 1 × 1
      P
  <dbl>
1 0.994
newdata |> summarise(P = mean(PEff > 20))
# A tibble: 1 × 1
      P
  <dbl>
1 0.748
newdata |>
  dplyr::select(-.chain, -.iteration) |>
  hypothesis("PEff>20")
Hypothesis Tests for class :
       Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
1 (PEff)-(20) > 0     6.63     10.17    -9.11    23.09       2.97      0.75
  Star
1     
---
'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
'*': For one-sided hypotheses, the posterior probability exceeds 95%;
for two-sided hypotheses, the value tested against lies outside the 95%-CI.
Posterior probabilities of point hypotheses assume equal prior probabilities.
newdata <- tobacco.brm3 |>
  emmeans(~TREATMENT) |>
  as.data.frame()
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
head(newdata)
 TREATMENT   emmean lower.HPD upper.HPD
 Strong    34.73041  30.72859  38.57838
 Weak      27.46164  22.94352  31.92560

Point estimate displayed: median 
HPD interval probability: 0.95 
ggplot(newdata, aes(y = emmean, x = TREATMENT)) +
  geom_pointrange(aes(ymin = lower.HPD, ymax = upper.HPD)) +
  theme_bw()

tobacco.brm3 |>
  emmeans(~TREATMENT) |>
  gather_emmeans_draws() |>
  ggplot() +
  geom_density_ridges(aes(x = .value, y = TREATMENT), alpha = 0.5, fill = "orange") +
  scale_x_continuous("Average number of lesions") +
  theme_bw()
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
Picking joint bandwidth of 0.431

newdat <- tobacco |> tidyr::expand(TREATMENT)
newdata <- tobacco.brm3 |>
  brms::posterior_epred(newdat, re_formula = NA) |>
  as.data.frame() |>
  rename_with(~ as.character(newdat$TREATMENT)) |>
  mutate(
    Eff = Strong - Weak,
    PEff = 100 * Eff / Weak
  )
head(newdata)
    Strong     Weak      Eff     PEff
1 35.00790 28.64045 6.367449 22.23237
2 38.69662 30.74235 7.954266 25.87397
3 35.63156 27.44804 8.183516 29.81457
4 35.27478 26.77384 8.500933 31.75089
5 34.41207 26.77280 7.639263 28.53367
6 30.92354 25.76446 5.159082 20.02402
newdata |> median_hdci(PEff)
# A tibble: 1 × 6
   PEff .lower .upper .width .point .interval
  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1  26.5   6.40   45.7   0.95 median hdci     
newdata |> summarise(P = mean(PEff > 0))
      P
1 0.994
newdata |> summarise(P = mean(PEff > 20))
      P
1 0.748
newdata <- tobacco.brm3 |>
  emmeans(~TREATMENT) |>
  pairs() |>
  gather_emmeans_draws()
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
newdata |> median_hdci()
# A tibble: 1 × 7
  contrast      .value .lower .upper .width .point .interval
  <chr>          <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 Strong - Weak   7.27   2.64   12.1   0.95 median hdci     
## OR on percentage scale
newdata <- tobacco.brm3 |>
  emmeans(~TREATMENT) |>
  regrid(trans = "log") |>
  pairs() |>
  regrid() |>
  gather_emmeans_draws() |>
  mutate(.value = (.value - 1) * 100)
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
newdata |> median_hdci()
# A tibble: 1 × 7
  contrast    .value .lower .upper .width .point .interval
  <chr>        <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 Strong/Weak   26.5   6.40   45.7   0.95 median hdci     
newdata |> summarise(P = mean(.value > 0))
# A tibble: 1 × 2
  contrast        P
  <chr>       <dbl>
1 Strong/Weak 0.994
newdata |> summarise(P = mean(.value > 20))
# A tibble: 1 × 2
  contrast        P
  <chr>       <dbl>
1 Strong/Weak 0.748

10 References